Analysis Dependencies

library(ggplot2)   # (Wickham, 2016)
library(tidyr)     # (Wickham and Henry, 2020)
library(dplyr)     # (Wickham et al., 2020)
library(reshape2)  # (Wickham, 2007)
library(cowplot)   # (Wilke, 2019)
library(patchwork) # (Pederson, 2020)
library(viridis)   # (Garnier, 2018)
library(hexbin)

We conducted these analyses using the following computing environment:

print(version)
##                _                           
## platform       x86_64-apple-darwin15.6.0   
## arch           x86_64                      
## os             darwin15.6.0                
## system         x86_64, darwin15.6.0        
## status                                     
## major          3                           
## minor          6.2                         
## year           2019                        
## month          12                          
## day            12                          
## svn rev        77560                       
## language       R                           
## version.string R version 3.6.2 (2019-12-12)
## nickname       Dark and Stormy Night

Setup

data_path <- "./data/agg_data.csv"
agg_data <- read.csv(data_path, na.strings="NONE")

agg_data$BIT_FLIP_PROB <- as.factor(agg_data$BIT_FLIP_PROB)
agg_data$DRIFT <- agg_data$TOURNAMENT_SIZE==1

chg_rate_label <- function(mag, interval, drift) {
  if (drift) { return("drift") }
  else if (interval == 0) { return("0") }
  else { return(paste(mag, interval, sep="/")) }
}

agg_data$chg_rate_label <- factor(mapply(chg_rate_label, 
                                            agg_data$CHANGE_MAGNITUDE,
                                            agg_data$CHANGE_FREQUENCY,
                                            agg_data$DRIFT),
                                     levels=c("drift", "0", "1/256", "1/128",
                                              "1/64", "1/32", "1/16", "1/8",
                                              "1/4", "1/2", "1/1", "2/1", 
                                              "4/1", "8/1", "16/1", "32/1",
                                              "64/1", "128/1", "256/1", 
                                              "512/1", "1024/1", "2048/1",
                                              "4096/1"))
theme_set(theme_cowplot())

data_nk_phase0 <- 
  filter(agg_data, GRADIENT_MODEL==0 & evo_phase == 0 & update==50000)
data_nk_phase1 <- 
  filter(agg_data, GRADIENT_MODEL==0 & evo_phase == 1 & update==60000)
data_gradient_phase0 <- 
  filter(agg_data, GRADIENT_MODEL==1 & evo_phase == 0 & update==50000)
data_gradient_phase1 <- 
  filter(agg_data, GRADIENT_MODEL==1 & evo_phase == 1 & update==60000)

Evolution Experiment

Fitness

Gradient Fitness Model

ggplot(data_gradient_phase0, 
       aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

NK Fitness Model

ggplot(data_nk_phase0, 
       aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

Coding Sites

Gradient Fitness Model

ggplot(data_gradient_phase0, 
       aes(x=BIT_FLIP_PROB, y=coding_sites, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Coding Sites (in best organism)",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

chg_rates <- levels(data_gradient_phase0$chg_rate_label)
mut_rates <- levels(data_gradient_phase0$BIT_FLIP_PROB)
for (chg_rate in chg_rates) {
  df <- filter(data_gradient_phase0, chg_rate_label==chg_rate)
  if (length(df$update) == 0) { next(); }
  print(paste("===== Change rate: ", chg_rate, " =====", sep=""))
  for (mut_rate in mut_rates) {
    mr <- filter(df, BIT_FLIP_PROB==mut_rate)
    if (length(mr$update) == 0) { next(); }
    med <- median(mr$coding_sites)
    print(paste("Median for", mut_rate, ":", med))
  }
  kt <- kruskal.test(formula = coding_sites ~ BIT_FLIP_PROB, data=df)
  print(kt)
  if (kt$p.value <= 0.05) {
    wt <- pairwise.wilcox.test(x=df$coding_sites,
                             g=df$BIT_FLIP_PROB,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
    print(wt)
  }
}
## [1] "===== Change rate: drift ====="
## [1] "Median for 3e-04 : 81.5"
## [1] "Median for 0.001 : 75.5"
## [1] "Median for 0.003 : 69.5"
## [1] "Median for 0.01 : 64.5"
## [1] "Median for 0.03 : 71"
## [1] "Median for 0.1 : 75.5"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 9.8694, df = 5, p-value = 0.07902
## 
## [1] "===== Change rate: 0 ====="
## [1] "Median for 3e-04 : 78"
## [1] "Median for 0.001 : 76"
## [1] "Median for 0.003 : 76"
## [1] "Median for 0.01 : 72.5"
## [1] "Median for 0.03 : 67"
## [1] "Median for 0.1 : 28"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 340.2, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04   0.001   0.003   0.01    0.03   
## 0.001 1.0000  -       -       -       -      
## 0.003 0.1582  1.0000  -       -       -      
## 0.01  5.7e-07 0.0005  0.0622  -       -      
## 0.03  < 2e-16 < 2e-16 7.7e-13 3.8e-08 -      
## 0.1   < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 1/128 ====="
## [1] "Median for 3e-04 : 124"
## [1] "Median for 0.001 : 123"
## [1] "Median for 0.003 : 119"
## [1] "Median for 0.01 : 110"
## [1] "Median for 0.03 : 89"
## [1] "Median for 0.1 : 39"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 476.71, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04   0.001   0.003   0.01    0.03   
## 0.001 1       -       -       -       -      
## 0.003 1.1e-09 5.9e-08 -       -       -      
## 0.01  < 2e-16 < 2e-16 2.3e-12 -       -      
## 0.03  < 2e-16 < 2e-16 < 2e-16 < 2e-16 -      
## 0.1   < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 1/4 ====="
## [1] "Median for 3e-04 : 127"
## [1] "Median for 0.001 : 128"
## [1] "Median for 0.003 : 128"
## [1] "Median for 0.01 : 127"
## [1] "Median for 0.03 : 115.5"
## [1] "Median for 0.1 : 65"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 400.32, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04  0.001  0.003  0.01   0.03  
## 0.001 0.685  -      -      -      -     
## 0.003 0.012  1.000  -      -      -     
## 0.01  1.000  1.000  0.023  -      -     
## 0.03  <2e-16 <2e-16 <2e-16 <2e-16 -     
## 0.1   <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 4/1 ====="
## [1] "Median for 3e-04 : 116"
## [1] "Median for 0.001 : 111.5"
## [1] "Median for 0.003 : 111"
## [1] "Median for 0.01 : 104"
## [1] "Median for 0.03 : 104"
## [1] "Median for 0.1 : 97"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 125.36, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04   0.001   0.003   0.01    0.03   
## 0.001 0.02862 -       -       -       -      
## 0.003 0.00794 1.00000 -       -       -      
## 0.01  7.6e-09 0.00016 0.02481 -       -      
## 0.03  7.4e-10 5.5e-05 0.01170 1.00000 -      
## 0.1   < 2e-16 4.4e-14 1.0e-09 0.00279 0.00486
## 
## P value adjustment method: bonferroni

NK Fitness Model

ggplot(data_nk_phase0, 
       aes(x=BIT_FLIP_PROB, y=coding_sites, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Coding Sites (in best organism)",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  facet_wrap(~ chg_rate_label) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

chg_rates <- levels(data_nk_phase0$chg_rate_label)
mut_rates <- levels(data_nk_phase0$BIT_FLIP_PROB)
for (chg_rate in chg_rates) {
  df <- filter(data_nk_phase0, chg_rate_label==chg_rate)
  if (length(df$update) == 0) { next(); }
  print(paste("===== Change rate: ", chg_rate, " =====", sep=""))
  for (mut_rate in mut_rates) {
    mr <- filter(df, BIT_FLIP_PROB==mut_rate)
    if (length(mr$update) == 0) { next(); }
    med <- median(mr$coding_sites)
    print(paste("Median for", mut_rate, ":", med))
  }
  kt <- kruskal.test(formula = coding_sites ~ BIT_FLIP_PROB, data=df)
  print(kt)
  if (kt$p.value <= 0.05) {
    wt <- pairwise.wilcox.test(x=df$coding_sites,
                             g=df$BIT_FLIP_PROB,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
    print(wt)
  }
}
## [1] "===== Change rate: drift ====="
## [1] "Median for 3e-04 : 77.5"
## [1] "Median for 0.001 : 78.5"
## [1] "Median for 0.003 : 76.5"
## [1] "Median for 0.01 : 69.5"
## [1] "Median for 0.03 : 72"
## [1] "Median for 0.1 : 69"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 2.3393, df = 5, p-value = 0.8005
## 
## [1] "===== Change rate: 0 ====="
## [1] "Median for 3e-04 : 77"
## [1] "Median for 0.001 : 76"
## [1] "Median for 0.003 : 75.5"
## [1] "Median for 0.01 : 73"
## [1] "Median for 0.03 : 60"
## [1] "Median for 0.1 : 21"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 404.75, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04  0.001  0.003  0.01   0.03  
## 0.001 1.0000 -      -      -      -     
## 0.003 1.0000 1.0000 -      -      -     
## 0.01  0.0093 0.1669 0.3626 -      -     
## 0.03  <2e-16 <2e-16 <2e-16 <2e-16 -     
## 0.1   <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 2/1 ====="
## [1] "Median for 3e-04 : 117"
## [1] "Median for 0.001 : 117"
## [1] "Median for 0.003 : 115.5"
## [1] "Median for 0.01 : 113"
## [1] "Median for 0.03 : 64.5"
## [1] "Median for 0.1 : 27"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 423.99, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04  0.001  0.003  0.01   0.03  
## 0.001 1.0000 -      -      -      -     
## 0.003 1.0000 1.0000 -      -      -     
## 0.01  0.0075 0.0022 0.4187 -      -     
## 0.03  <2e-16 <2e-16 <2e-16 <2e-16 -     
## 0.1   <2e-16 <2e-16 <2e-16 <2e-16 <2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 64/1 ====="
## [1] "Median for 3e-04 : 123"
## [1] "Median for 0.001 : 123"
## [1] "Median for 0.003 : 123"
## [1] "Median for 0.01 : 120.5"
## [1] "Median for 0.03 : 75"
## [1] "Median for 0.1 : 13"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 423.98, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04   0.001   0.003   0.01    0.03   
## 0.001 1.00000 -       -       -       -      
## 0.003 1.00000 1.00000 -       -       -      
## 0.01  0.00028 0.00254 0.05478 -       -      
## 0.03  < 2e-16 < 2e-16 < 2e-16 < 2e-16 -      
## 0.1   < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 
## P value adjustment method: bonferroni 
## [1] "===== Change rate: 1024/1 ====="
## [1] "Median for 3e-04 : 108"
## [1] "Median for 0.001 : 102"
## [1] "Median for 0.003 : 97.5"
## [1] "Median for 0.01 : 97.5"
## [1] "Median for 0.03 : 91.5"
## [1] "Median for 0.1 : 16"
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by BIT_FLIP_PROB
## Kruskal-Wallis chi-squared = 268.94, df = 5, p-value < 2.2e-16
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df$coding_sites and df$BIT_FLIP_PROB 
## 
##       3e-04   0.001   0.003   0.01    0.03   
## 0.001 0.021   -       -       -       -      
## 0.003 3.3e-06 0.485   -       -       -      
## 0.01  3.2e-06 0.417   1.000   -       -      
## 0.03  8.8e-13 9.9e-06 0.024   0.105   -      
## 0.1   < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 
## P value adjustment method: bonferroni

Genome Length

Gradient Fitness Model

ggplot(data_gradient_phase0, 
       aes(x=BIT_FLIP_PROB, y=genome_length, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Genome Length (in best organism)",
                     limits=c(0, 1025)) +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

NK Fitness Model

ggplot(data_nk_phase0, 
       aes(x=BIT_FLIP_PROB, y=genome_length, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Genome Length (in best organism)",
                     limits=c(0, 1025)) +
  facet_wrap(~ chg_rate_label) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

Neutral Sites

Gradient Fitness Model

ggplot(data_gradient_phase0, 
       aes(x=BIT_FLIP_PROB, y=neutral_sites, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Neutral Sites (in best organism)") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

NK Fitness Model

ggplot(data_nk_phase0, 
       aes(x=BIT_FLIP_PROB, y=neutral_sites, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Neutral Sites (in best organism)") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

Transfer evolution experiment

Fitness

Gradient Fitness Model

ggplot(data_gradient_phase1, 
       aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

NK Fitness Model

ggplot(data_nk_phase1, 
       aes(x=BIT_FLIP_PROB, y=fitness, color=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Rate") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ chg_rate_label) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90))

Fitness vs Coding Sites

Gradient Fitness Model

ggplot(data_gradient_phase1, 
       aes(x=coding_sites, y=fitness, color=chg_rate_label)) +
  geom_jitter() +
  xlab("Coding Sites") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ BIT_FLIP_PROB) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position = "bottom")

NK Fitness Model

ggplot(data_nk_phase1, 
       aes(x=coding_sites, y=fitness, color=chg_rate_label)) +
  geom_jitter() +
  xlab("Coding Sites") +
  scale_y_continuous(name="Fitness") +
  facet_wrap(~ BIT_FLIP_PROB) +
  ggtitle("NK Fitness Model") +
  theme(legend.position = "bottom")

Publication Figures

High mutation rate promotes gene overlap

rates = c("drift", "0", "1/4")
labels <- c(
  "drift"="Drift",
  "0"="Static",
  "1/4"="Changing Env. (1/4)"
)
ggplot(filter(data_gradient_phase0, chg_rate_label %in% rates), 
       aes(x=BIT_FLIP_PROB, y=coding_sites, fill=BIT_FLIP_PROB)) +
  geom_boxplot() +
  xlab("Bit Flip Mutation Rate") +
  scale_y_continuous(name="Coding Sites\n(in best organism)",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  facet_wrap(~ chg_rate_label, labeller = labeller(chg_rate_label=labels)) +
  theme(legend.position="none",
        axis.text.x=element_text(angle=90)) +
  ggsave("./imgs/coding-sites-v-mutation-gradient-phase0.pdf", width=10, height=7)

Does gene target similarity (gradient fitness model) predict overlap between two genes?

env_sim_data_path <- "./data/env_sim_gene_overlap.csv"
env_sim_data <- read.csv(env_sim_data_path, na.strings="NONE")

env_sim_data <- filter(env_sim_data, GRADIENT_MODEL==1)
env_sim_data$BIT_FLIP_PROB <- as.factor(env_sim_data$BIT_FLIP_PROB)
env_sim_data$DRIFT <- env_sim_data$TOURNAMENT_SIZE==1

env_sim_data$chg_rate_label <- factor(mapply(chg_rate_label, 
                                            env_sim_data$CHANGE_MAGNITUDE,
                                            env_sim_data$CHANGE_FREQUENCY,
                                            env_sim_data$DRIFT),
                                     levels=c("drift", "0", "1/256", "1/128",
                                              "1/64", "1/32", "1/16", "1/8",
                                              "1/4", "1/2", "1/1", "2/1", 
                                              "4/1", "8/1", "16/1", "32/1",
                                              "64/1", "128/1", "256/1", 
                                              "512/1", "1024/1", "2048/1",
                                              "4096/1"))

env_sim_data$gene_pair_target_similarity_factor <-
  as.factor(env_sim_data$gene_pair_target_similarity)
env_sim_data$gene_pair_overlap_factor <- 
  as.factor(env_sim_data$gene_pair_overlap)
env_sim_data$gene_pair_target_max_alignment_similarity_factor <-
  as.factor(env_sim_data$gene_pair_target_max_alignment_similarity)

env_sim_data_selection <- filter(env_sim_data, TOURNAMENT_SIZE==8)
env_sim_data_drift <- filter(env_sim_data, TOURNAMENT_SIZE==1)

env_sim_data_selection_static <- filter(env_sim_data_selection, chg_rate_label=="0")

Gene Target Similarity Frequencies

We’ll look at just the static environment.

Naive Hamming Similarity

For each replicate, we measure the pairwise gene target similarity (1 - Hamming Distance). I.e., we measure the number of bits that match between each pair of gene targets (within a run).

These measures are for naively aligned gene targets.

# Occurences of environment similarity (static environment)
a <- 
ggplot(env_sim_data_selection_static,
       aes(x=gene_pair_target_similarity_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  ylim(0, 6000) +
  xlab("Pairwise gene target similarity") + 
  facet_wrap(~BIT_FLIP_PROB) +
  ggtitle("Selection")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
b <-
ggplot(env_sim_data_drift, aes(x=gene_pair_target_similarity_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  ylim(0, 6000) +
  xlab("Pairwise gene target similarity") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
(a | b)

As expected, no differences between drift and selection conditions (no reason to expect there to be a difference).

Best-alignment Hamming Similarity

Here, for each pair of gene targets (within a run), we measure the best hamming similarity across all possible alignments of two gene targets.

# Occurences of environment similarities (given max alignment)
a <- 
ggplot(env_sim_data_selection_static,
       aes(x=gene_pair_target_max_alignment_similarity_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  ylim(0, 6000) +
  xlab("Pairwise gene target similarity\n(best alignment)") + 
  facet_wrap(~BIT_FLIP_PROB) +
  ggtitle("Selection")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
b <-
ggplot(env_sim_data_drift, 
       aes(x=gene_pair_target_max_alignment_similarity_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene target similarity\n(best_alignment)") +
  ylim(0, 6000) +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
(a | b)

Using the best-alignment, shifts the distribution of observed gene target similarities to the right.

Gene Overlap Frequencies

For each evolved genetic architecture, we measure the pairwise gene overlap among each of the 16 genes.

Drift

ggplot(filter(env_sim_data_drift), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data_drift, gene_pair_overlap>0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Drift")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Static Environment

ggplot(env_sim_data_selection_static, 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Static Environment")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data_selection_static, gene_pair_overlap>0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Static Environment")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Changing Environment

1 change every 128 generations (1/128)
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/128"), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (1/128)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data_selection, chg_rate_label=="1/128" & gene_pair_overlap > 0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (1/128)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

1 change every 4 generations (1/4)
ggplot(filter(env_sim_data_selection, chg_rate_label=="1/4"), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (1/4)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data_selection, chg_rate_label=="1/4" & gene_pair_overlap > 0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (1/4)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

4 changes every generation (4/1)
ggplot(filter(env_sim_data_selection, chg_rate_label=="4/1"), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (4/1)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data_selection, chg_rate_label=="4/1" & gene_pair_overlap > 0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_wrap(~BIT_FLIP_PROB) + 
  ggtitle("Changing Environment (4/1)")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

All Together

ggplot(env_sim_data, 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap") +
  facet_grid(BIT_FLIP_PROB~chg_rate_label)
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(filter(env_sim_data, gene_pair_overlap>0), 
       aes(x=gene_pair_overlap_factor)) +
  geom_histogram(stat="count") +
  ylab("Occurences") +
  xlab("Pairwise gene overlap (excluding 0)") +
  facet_grid(BIT_FLIP_PROB~chg_rate_label)
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Pairwise Gene Target Similarity (best-alignment) Vs. Pairwise Gene Overlap (un-normalized)

Does best-alignment gene target similarity predict gene overlap amount?

Drift

ggplot(env_sim_data_drift,
       aes(x=gene_pair_target_max_alignment_similarity, 
           y=gene_pair_overlap)) +
  geom_bin2d(binwidth=1) +
  scale_fill_continuous(type ="viridis", 
                        trans="log",
                        name="Count",
                        breaks=c(1, 10, 100, 1000)) +
  scale_y_continuous(name="Gene Pair Overlap",
                     breaks=seq(0, 9)) +
  scale_x_continuous(name="target similarity (best alignment)",
                     breaks=seq(0, 9)) +
  facet_wrap(~BIT_FLIP_PROB)

Exclude 0 overlap

ggplot(filter(env_sim_data_drift, gene_pair_overlap > 0),
       aes(x=gene_pair_target_max_alignment_similarity, 
           y=gene_pair_overlap)) +
  geom_bin2d(binwidth=1) +
  scale_fill_continuous(type ="viridis", 
                        trans="log",
                        name="Count",
                        breaks=c(1, 10, 100, 1000)) +
  scale_y_continuous(name="Gene Pair Overlap",
                     breaks=seq(0, 9)) +
  scale_x_continuous(name="target similarity (best alignment)",
                     breaks=seq(0, 9)) +
  facet_wrap(~BIT_FLIP_PROB)

Static Environment

ggplot(env_sim_data_selection_static,
       aes(x=gene_pair_target_max_alignment_similarity, 
           y=gene_pair_overlap)) +
  geom_bin2d(binwidth=1) +
  scale_fill_continuous(type ="viridis", 
                        trans="log",
                        name="Count",
                        breaks=c(1, 10, 100, 1000, 10000)) +
  scale_y_continuous(name="Gene Pair Overlap",
                     breaks=seq(0, 9)) +
  scale_x_continuous(name="target similarity (best alignment)",
                     breaks=seq(0, 9)) +
  facet_wrap(~BIT_FLIP_PROB)

ggplot(filter(env_sim_data_selection_static, gene_pair_overlap > 0),
       aes(x=gene_pair_target_max_alignment_similarity, 
           y=gene_pair_overlap)) +
  geom_bin2d(binwidth=1) +
  scale_fill_continuous(type ="viridis", 
                        trans="log",
                        name="Count",
                        breaks=c(1, 10, 100, 1000, 10000)) +
  scale_y_continuous(name="Gene Pair Overlap",
                     breaks=seq(0, 9)) +
  scale_x_continuous(name="target similarity (best alignment)",
                     breaks=seq(0, 9)) +
  facet_wrap(~BIT_FLIP_PROB)

Overlap level proportions for each gene target similarity

Compute proportions

mut_rates <- levels(env_sim_data$BIT_FLIP_PROB)
# chg_rates <- levels(env_sim_data$chg_rate_label)
chg_rates <- c("drift", "0")
gene_overlap_levels <-
  levels(env_sim_data$gene_pair_overlap_factor)
gene_target_best_alignment_sim_levels <- 
  levels(env_sim_data$gene_pair_target_max_alignment_similarity_factor)

overlap_proportions <- 
  read.csv(text="mutation_rate,chg_rate_label,gene_target_sim_level,gene_overlap_level,gene_overlap_prop,gene_overlap_cnt")
for (mut_rate in mut_rates) {
  for (chg_rate in chg_rates) {
    for (sim in gene_target_best_alignment_sim_levels) {
      # For each similarity level, compute proportion for each level of gene overlap
      d <- filter(env_sim_data, 
                  BIT_FLIP_PROB==mut_rate &
                    chg_rate_label==chg_rate &
                    gene_pair_target_max_alignment_similarity_factor==sim)
      total <- length(d$update) # How many total observations at this similarity level?
      for (overlap in gene_overlap_levels) {
        overlap_proportion <- 0
        # How many are at this overlap level?
        overlap_cnt <- length(filter(d, gene_pair_overlap_factor==overlap)$update)
        if (total > 0) {
          overlap_proportion <- overlap_cnt / total
        } else {
          overlap_proportion <- 0
        }
        new_row <- data.frame(mutation_rate=c(mut_rate),
                              chg_rate_label=c(chg_rate),
                              gene_target_sim_level=c(sim),
                              gene_overlap_level=c(overlap),
                              gene_overlap_prop=c(overlap_proportion),
                              gene_overlap_cnt=c(overlap_cnt))
        overlap_proportions <- rbind(overlap_proportions, new_row)
      }
    }
  }
}
ggplot(overlap_proportions, 
       aes(x=gene_target_sim_level, y=gene_overlap_level, fill=gene_overlap_prop)) +
  geom_tile() +
  xlab("Gene Target Similarity (best alignment)") +
  scale_fill_continuous(type ="viridis") +
  facet_grid(mutation_rate~chg_rate_label) + 
  ggsave("./imgs/gene_overlap_proportions.pdf", width=10, height=20)

ggplot(overlap_proportions, 
       aes(x=gene_target_sim_level, y=gene_overlap_prop, fill=gene_overlap_level)) +
  geom_bar(position="fill", stat="identity") +
  xlab("Gene Target Similarity (best alignment)") +
  ylab("Proportion of Pairs") +
  scale_fill_discrete(name="Gene Overlap Amount") +
  facet_grid(mutation_rate~chg_rate_label) + 
  theme(legend.position = "top") +
  ggsave("./imgs/gene_overlap_proportions_stackedbar.pdf", width=10, height=20)
## Warning: Removed 90 rows containing missing values (geom_bar).

## Warning: Removed 90 rows containing missing values (geom_bar).

Proportion of non-zero overlap

### Collect proportion non-zero

overlap_proportions_non0 <- 
  read.csv(text="mutation_rate,chg_rate_label,gene_target_sim_level,gene_overlap_level,gene_overlap_prop,gene_overlap_cnt")
for (mut_rate in mut_rates) {
  for (chg_rate in chg_rates) {
    for (sim in gene_target_best_alignment_sim_levels) {
      # For each similarity level, compute proportion for each level of gene overlap
      d <- filter(env_sim_data, 
                  BIT_FLIP_PROB==mut_rate &
                    chg_rate_label==chg_rate &
                    gene_pair_target_max_alignment_similarity_factor==sim &
                    gene_pair_overlap!=0)
      total <- length(d$update) # How many total observations at this similarity level?
      for (overlap in gene_overlap_levels) {
        if (overlap=="0") { next() }
        overlap_proportion <- 0
        # How many are at this overlap level?
        overlap_cnt <- length(filter(d, gene_pair_overlap_factor==overlap)$update)
        if (total > 0) {
          overlap_proportion <- overlap_cnt / total
        } else {
          overlap_proportion <- 0
        }
        new_row <- data.frame(mutation_rate=c(mut_rate),
                              chg_rate_label=c(chg_rate),
                              gene_target_sim_level=c(sim),
                              gene_overlap_level=c(overlap),
                              gene_overlap_prop=c(overlap_proportion),
                              gene_overlap_cnt=c(overlap_cnt))
        overlap_proportions_non0 <- rbind(overlap_proportions_non0, new_row)
      }
    }
  }
}
ggplot(overlap_proportions_non0, 
       aes(x=gene_target_sim_level, y=gene_overlap_level, fill=gene_overlap_prop)) +
  geom_tile() +
  xlab("Gene Target Similarity (best alignment)") +
  scale_fill_continuous(type ="viridis") +
  facet_grid(mutation_rate~chg_rate_label) + 
  ggsave("./imgs/gene_overlap_proportions_non0.pdf", width=10, height=20)

ggplot(overlap_proportions_non0, 
       aes(x=gene_target_sim_level, y=gene_overlap_prop, fill=gene_overlap_level)) +
  geom_bar(position="fill", stat="identity") +
  xlab("Gene Target Similarity (best alignment)") +
  scale_fill_viridis(discrete = TRUE) +
  facet_grid(mutation_rate~chg_rate_label) +
  ggsave("./imgs/gene_overlap_proportions_stackedbar_non0.pdf", width=10, height=20)
## Warning: Removed 152 rows containing missing values (geom_bar).

## Warning: Removed 152 rows containing missing values (geom_bar).